import matplotlib
import parselmouth
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import librosa
from librosa.display import specshow, waveplot
import pandas as pd
from matplotlib import gridspec
from IPython.display import Audio
import scipy.signal as sig
sns.set_theme()
start_time = 270
end_time = 300
sarod_audio_path = 'Data/NIR_PrB_Jhinjhoti_2Gats_Sarod.wav'
tabla_audio_path = 'Data/NIR_PrB_Jhinjhoti_2Gats_Tabla.wav'
mix_audio_path = 'Data/NIR_PrB_Jhinjhoti_2Gats_StereoMix.wav'
cycle_file = 'Data/NIR_PrB_jhinjhoti_2Gats_Metre_Rupak.csv'
onset_file = 'Data/NIR_PrB_Jhinjhoti_2Gats_Onsets_Selected_Rupak.csv'
We load only the relevant part of the audio file here. Audio is loaded in mono, and sample rate is unchanged.
The frequencies sung are extracted using a software called Praat.
sarod_audio, sarod_sr = librosa.load(sarod_audio_path, sr=None, mono=True, offset=start_time, duration = end_time - start_time)
cycle_df = pd.read_csv(cycle_file)
index_values = cycle_df.loc[(cycle_df['Time'] >= start_time) & (cycle_df['Time'] <= end_time)].index.values
annotated_beats = cycle_df.iloc[max(index_values[0]-1, 0):min(index_values[-1]+2, cycle_df.shape[0])]
# add all matras to show - each annotated beat has 7 divisions
matras = []
for ind, beats in enumerate(annotated_beats['Time'].values[:-1]):
matras.extend(np.around(np.linspace(beats, annotated_beats['Time'].values[ind+1], num = 7, endpoint=False), 2)[1:])
Extract sarod and tabla onsets from the annotation file
onset_vals = pd.read_csv(onset_file)
sarod_onset_vals = onset_vals.loc[(onset_vals['Inst'] >= start_time) & (onset_vals['Inst'] <= end_time)]
tabla_onset_vals = onset_vals.loc[(onset_vals['Tabla'] >= start_time) & (onset_vals['Tabla'] <= end_time)]
Use the sarod and tabla audio to generate spectograms
#spectogram (created using the stereo mix audio)
sarod_audio, sarod_sr = librosa.load(sarod_audio_path, sr=None, mono=True, offset=start_time, duration = end_time - start_time)
tabla_audio, tabla_sr = librosa.load(tabla_audio_path, sr=None, mono=True, offset=start_time, duration = end_time - start_time)
mix_audio, mix_sr = librosa.load(mix_audio_path, sr=None, mono=True, offset=start_time, duration = end_time - start_time)
# stft params
winsize = int(np.ceil(mix_sr*40e-3))
hopsize = int(np.ceil(mix_sr*10e-3))
nfft = int(2**np.ceil(np.log2(winsize)))
# extract spectogram
f_sarod,t_sarod,X_sarod = sig.stft(sarod_audio, fs=sarod_sr, window='hann', nperseg=winsize, noverlap=(winsize-hopsize), nfft=nfft)
X_sarod_dB = librosa.power_to_db(np.abs(X_sarod), ref = np.max, amin=1e-5)
f_tabla,t_tabla,X_tabla = sig.stft(tabla_audio, fs=tabla_sr, window='hann', nperseg=winsize, noverlap=(winsize-hopsize), nfft=nfft)
X_tabla_dB = librosa.power_to_db(np.abs(X_tabla), ref = np.max, amin=1e-5)
Play the audio being analysed
Audio(mix_audio, rate=mix_sr)
Plotting the spectograms of tabla and sarod
fig = plt.figure(figsize=(24, 7))
specs = fig.add_gridspec(3, 1, height_ratios = [2, 3, 3])
axs = [fig.add_subplot(specs[0, 0]), fig.add_subplot(specs[1, 0]), fig.add_subplot(specs[2, 0])]
#plot the waveform
waveplot(mix_audio, mix_sr, ax=axs[0])
axs[0].set(title='Waveform')
#plot spectograms
specshow(X_sarod_dB, x_axis='time', y_axis='linear', sr=sarod_sr, fmax=sarod_sr//2, hop_length=hopsize, ax=axs[1], cmap='Blues')
axs[1].set(ylabel='Frequency (Hz)', xlabel='', title='Spectogram - Sarod')
specshow(X_tabla_dB, x_axis='time', y_axis='linear', sr=tabla_sr, fmax=tabla_sr//2, hop_length=hopsize, ax=axs[2], cmap='Oranges')
axs[2].set(ylabel='Frequency (Hz)', xlabel='', title='Spectogram - Tabla')
for i, beat in annotated_beats.iterrows():
# plot annotated beats
if beat['Time'] >= start_time and beat['Time'] <= end_time:
axs[0].axvline(beat['Time']-start_time, linestyle='-', c='purple')
for matra in matras:
# plot beats in the middle
if matra >= start_time and matra <= end_time:
axs[0].axvline(matra-start_time, linestyle='--', c='purple')
for _, onset in sarod_onset_vals.iterrows():
axs[1].axvline(onset['Inst']-start_time, linestyle='--', c='red')
for _, onset in tabla_onset_vals.iterrows():
axs[2].axvline(onset['Tabla']-start_time, linestyle='--', c='blue')
axs[2].set_xlim(0, end_time-start_time)
axs[2].set_xticks(np.linspace(0, end_time-start_time, 10))
axs[2].set_xticklabels(np.around(np.linspace(0, end_time-start_time, 10) + start_time, 2))
axs[1].set_xticks([])
axs[1].set_xlim(axs[2].get_xlim())
axs[0].set_xticks([])
axs[0].set_xlim(axs[1].get_xlim())
axs[1].set_ylim(0, 3000)
axs[1].set_yticks([0, 1e3, 2e3])
axs[1].set_yticklabels(['0', '1k', '2k'])
axs[2].set_ylim(0, 3000)
axs[2].set_yticks([0, 1e3, 2e3])
axs[2].set_yticklabels(['0', '1k', '2k'])
fig.tight_layout()
Audio mixed with clicks at the sitar onset points lower pitch - tabla, higher pitch - sarod
click_track_sarod = librosa.clicks(sarod_onset_vals['Inst'].values-start_time, sr=mix_sr, length=len(sarod_audio), click_freq=3000)
click_track_tabla = librosa.clicks(tabla_onset_vals['Tabla'].values-start_time, sr=mix_sr, length=len(sarod_audio), click_freq=1000)
Audio(0.7*mix_audio + 0.15*click_track_sarod + 0.15*click_track_tabla, rate=mix_sr)
Audio with just sarod clicks
Audio(0.8*sarod_audio + 0.2*click_track_sarod, rate=sarod_sr)
Audio with just tabla clicks
Audio(0.8*tabla_audio + 0.2*click_track_tabla, rate=sarod_sr)